Kelompok 4 Pararel 2
| Nama | NIM |
|---|---|
| Angga Fathan Rofiqy | G1401211006 |
| Gladys Adya Zafira | G1401211014 |
| Kheni Hikmah Lestari | G1401211029 |
Bagian A
Poin 1
Bangkitkan sebaran Normal, Seragam, dan Eksponensial dengan ukuran contoh 2, 5, 25
Berikut merupakan hasil pembangkitan masing-masing serbaran dengan ukuran contoh 2, 5, 25:
Normal
sample_size <- c(2, 5, 25)
norm_samples <- lapply(sample_size, function(n) rnorm(n))
print(norm_samples)
## [[1]]
## [1] 0.6618399 -1.0396015
##
## [[2]]
## [1] -0.1133533 -0.7842103 -1.1207241 0.8358447 1.3170687
##
## [[3]]
## [1] -0.85391134 -0.19806574 0.17501876 0.28681661 -1.52544801 1.54175377
## [7] 0.63749947 -0.52997639 0.51018725 -0.40586468 -0.35455015 0.06216746
## [13] 0.63241547 1.18121203 -0.42685744 -0.48964798 -0.80914599 -0.69573478
## [19] -0.77911974 0.20524301 -2.18057309 -0.20495423 -0.43196927 -2.39654053
## [25] 0.21454434
Sebaran Normal dengan ukuran contoh (n) 2, 5, 25
Seragam
unif_samples <- lapply(sample_size, function(n) runif(n))
print(unif_samples)
## [[1]]
## [1] 0.2366930 0.1089945
##
## [[2]]
## [1] 0.79864044 0.08639965 0.27637487 0.66042945 0.56820341
##
## [[3]]
## [1] 0.008772071 0.672332964 0.309703747 0.449249855 0.033203530 0.873477992
## [7] 0.321186572 0.013921430 0.325077242 0.295558050 0.529569712 0.089710566
## [13] 0.181532595 0.830026652 0.725876963 0.165027744 0.123632807 0.302486273
## [19] 0.969135549 0.775867797 0.751753086 0.901700803 0.717343746 0.807527856
## [25] 0.099714373
Sebaran Seragam dengan ukuran contoh (n) 2, 5, 25
Eksponensial
exp_samples <- lapply(sample_size, function(n) rexp(n))
print(exp_samples)
## [[1]]
## [1] 1.826871 1.079528
##
## [[2]]
## [1] 0.1891795 0.1219233 0.6445922 0.2931478 1.5982774
##
## [[3]]
## [1] 3.34190425 1.39144382 1.14366010 4.42045024 0.64891298 1.78578419
## [7] 0.32983693 3.83352017 3.57196360 1.22980552 0.14894666 1.18151911
## [13] 0.84442226 1.38289438 1.23004145 1.59505133 0.06519184 2.08604324
## [19] 0.24748579 0.56882112 0.58466625 0.27622625 2.58888012 0.13200431
## [25] 0.92176948
Sebaran Eksponensial dengan ukuran contoh (n) 2, 5, 25
Poin 2
Buat histogram dari rataan contohnya
# Menghitung rata-rata dari masing-masing sampel
norm_means <- sapply(norm_samples, mean)
unif_means <- sapply(unif_samples, mean)
exp_means <- sapply(exp_samples, mean)
Rata-rata contoh masing-masing sebaran:
Normal
print(norm_means)
## [1] -0.18888083 0.02692514 -0.27342005
Seragam
print(unif_means)
## [1] 0.1728438 0.4780096 0.4509356
Eksponensial
print(exp_means)
## [1] 1.453199 0.569424 1.422050
Histogram dari rata-rata contoh
par(mfrow = c(1, 3))
hist(norm_means, main = "Histogram Mean Normal",
xlab = "Rata-rata", ylab = "Frekuensi", col = "#8DC16D")
hist(unif_means, main = "Histogram Mean Seragam",
xlab = "Rata-rata", ylab = "Frekuensi", col = "#4493CA")
hist(exp_means, main = "Histogram Mean Exponensial",
xlab = "Rata-rata", ylab = "Frekuensi", col = "#D44155")
Poin 3
Buat normal qq-plot dari masing-masing n
par(mfrow = c(1, 3))
qqnorm(norm_means, main = "Normal QQ Plot", col = "#8DC16D", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(norm_means, col = "red", lwd=2)
qqnorm(unif_means, main = "Uniform QQ Plot", col = "#4493CA", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(unif_means, col = "red", lwd=2)
qqnorm(exp_means, main = "Exponensial QQ Plot", col = "#D44155", lwd = 7,
xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(exp_means, col = "red", lwd=2)
Bagian B
Poin 1
Bangkitkan dua gugus data
Data dari sebaran Normal
Data campuran:
50% dari sebaran normal + 50% dari sebaran chi-square
50% sebaran chi-square dengan paramenter a + 50% sebaran chi-square dengan paramenter b
25% sebaran chi-square dengan parameter a + 25% sebaran chi-suqare dengan parameter b + 25% sebaran normal dengan paramater a + 25% sebaran normal parameter b
Normal
norm_data <- rnorm(1000, mean = 0, sd = 1)
hist(norm_data, main = "Histogram Normal",
xlab = "Nilai", ylab = "Frekuensi", col = "#4664AF")
Campuran 1
50% dari sebaran normal + 50% dari sebaran chi-square
csq_data <- rchisq(1000, df = 1)
mixed_data_1 <- c(norm_data[1:500], csq_data[1:500])
hist(mixed_data_1, main = "Histogram Campuran 1",
xlab = "Nilai", ylab = "Frekuensi", col = "#9F4274")
Campuran 2
50% sebaran chi-square dengan paramenter a + 50% sebaran chi-square dengan paramenter b
csq_data_a <- rchisq(500, df = 1)
csq_data_b <- rchisq(500, df = 2)
mixed_data_2 <- c(csq_data_a, csq_data_b)
hist(mixed_data_2, main = "Histogram Campuran 2",
xlab = "Nilai", ylab = "Frekuensi", col = "#66C8CC")
Campuran 3
25% sebaran chi-square dengan parameter a + 25% sebaran chi-suqare dengan parameter b + 25% sebaran normal dengan paramater a + 25% sebaran normal parameter b
norm_data_a <- rnorm(250, mean = 0, sd = 1)
norm_data_b <- rnorm(250, mean = 1, sd = 1)
mixed_data_3 <- c(csq_data_a, csq_data_b, norm_data_a, norm_data_b)
hist(mixed_data_3, main = "Histogram Campuran 3",
xlab = "Nilai", ylab = "Frekuensi", col = "#F8D054")
Poin 2
- Ambil sample dengan ukuran n = 4, 12, 20, 60, 100
- Buat histogram dan normal qq-plot
- Pada n berapa sebaran rataan dari masing-masing data mulai simeteris atau mendekati sebaran normal?
sample_sizes <- c(4, 12, 20, 60, 100)
plot_distribution <- function(data, title, color) {
df <- data.frame(x = data)
# Histogram
p1 <- ggplot(data = df, aes(x = x)) +
geom_histogram(bins = 20, fill = color, alpha = 1) +
geom_density(aes(y = ..count..), fill = color, alpha = 0.4) +
labs(title = paste0("\n\n",title)) +
theme_minimal() +
theme(
plot.title = element_text(size = 22, face = "bold", hjust = 3)
)
# QQ plot
p2 <- ggplot(data = df, aes(sample = x)) +
stat_qq(color = color, size =5) +
stat_qq_line(color = "red", linewidth =2) +
labs(title = paste0("\n\n\n\n")) +
theme_minimal()
return(list(p1, p2))
}
all_plots <- list()
for (size in sample_sizes) {
samples <- list(
"Mixed Data 1" = mixed_data_1,
"Mixed Data 2" = mixed_data_2,
"Mixed Data 3" = mixed_data_3
)
plots_for_size <- list()
colors <- c("#4664AF", "#9F4274", "#66C8CC")
for (i in 1:length(samples)) {
title <- names(samples)[i]
sample <- sample(samples[[title]], size, replace = FALSE)
plots <- plot_distribution(sample, paste0(title, " (n=", size,")"), colors[i])
plots_for_size <- c(plots_for_size, plots)
}
all_plots <- c(all_plots, plots_for_size)
}
plot_grid(plotlist = all_plots, ncol = 6)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.